{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RAG using Meta AI Llama-3\n",
    "\n",
    "\n",
    "<img src=\"./resources/rag_architecture.png\" width=800px>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "from dotenv import load_dotenv\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "from llama_index.core import Settings\n",
    "from llama_index.llms.ollama import Ollama\n",
    "from llama_index.core import PromptTemplate\n",
    "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
    "from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader\n",
    "\n",
    "from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
    "from llama_index.core import Settings\n",
    "import qdrant_client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# allows nested access to the event loop\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add your documents in this directory, you can drag & drop\n",
    "input_dir_path = '/teamspace/studios/this_studio/test-dir'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "collection_name=\"chat_with_docs\"\n",
    "\n",
    "client = qdrant_client.QdrantClient(\n",
    "    host=\"localhost\",\n",
    "    port=6333\n",
    ")\n",
    "\n",
    "def create_index(documents):\n",
    "    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)\n",
    "    storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
    "    index = VectorStoreIndex.from_documents(\n",
    "        documents,\n",
    "        storage_context=storage_context,\n",
    "    )\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7b4ba9e36b4e47b982be21b95b24a181",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bf2ebc67bf4a4caf8c6292b80f869b7c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8e41ff80db1a44a1ac3dc99fc477a819",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "17460d4930c241c8a7af9208b82d1310",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1418bcfbba844062a80299a82f04d21d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f73ccdc9f6be4b9e9e5d69d3de936ec1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "# setup llm & embedding model\n",
    "llm=Ollama(model=\"llama3.3\", request_timeout=120.0)\n",
    "# embed_model = HuggingFaceEmbedding( model_name=\"Snowflake/snowflake-arctic-embed-m\", trust_remote_code=True)\n",
    "embed_model = HuggingFaceEmbedding( model_name=\"BAAI/bge-large-en-v1.5\", trust_remote_code=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b9f486b6a1da4f15bb0e43469fa8c420",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Parsing nodes:   0%|          | 0/17 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "363a055481fb4d808da9551727ee5307",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating embeddings:   0%|          | 0/26 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# load data\n",
    "loader = SimpleDirectoryReader(\n",
    "            input_dir = input_dir_path,\n",
    "            required_exts=[\".pdf\"],\n",
    "            recursive=True\n",
    "        )\n",
    "docs = loader.load_data()\n",
    "\n",
    "# Creating an index over loaded data\n",
    "Settings.embed_model = embed_model\n",
    "try:\n",
    "    index = create_index(docs)\n",
    "    print('Using Qdrant collection')\n",
    "except:\n",
    "    index = VectorStoreIndex.from_documents(docs, show_progress=True)\n",
    "\n",
    "# Create the query engine, where we use a cohere reranker on the fetched nodes\n",
    "Settings.llm = llm\n",
    "query_engine = index.as_query_engine()\n",
    "\n",
    "# ====== Customise prompt template ======\n",
    "qa_prompt_tmpl_str = (\n",
    "\"Context information is below.\\n\"\n",
    "\"---------------------\\n\"\n",
    "\"{context_str}\\n\"\n",
    "\"---------------------\\n\"\n",
    "\"Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\\n\"\n",
    "\"Query: {query_str}\\n\"\n",
    "\"Answer: \"\n",
    ")\n",
    "qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)\n",
    "\n",
    "query_engine.update_prompts(\n",
    "    {\"response_synthesizer:text_qa_template\": qa_prompt_tmpl}\n",
    ")\n",
    "\n",
    "# Generate the response\n",
    "response = query_engine.query(\"What exactly is DSPy?\",)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "DSPy is a framework for programmatically solving advanced tasks with language and retrieval models through composing and declaring modules. It aims to replace brittle \"prompt engineering\" tricks with composable modules and automatic optimizers, allowing developers to define signatures that specify what a language model (LM) needs to do declaratively."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(str(response)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ❗️❗️ Make sure you clear GPU memory by clicking on Restart button above, if you want to use Streamlit from here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sat Dec  7 08:31:49 2024       \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 535.216.03             Driver Version: 535.216.03   CUDA Version: 12.2     |\n",
      "|-----------------------------------------+----------------------+----------------------+\n",
      "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
      "|                                         |                      |               MIG M. |\n",
      "|=========================================+======================+======================|\n",
      "|   0  NVIDIA L4                      Off | 00000000:35:00.0 Off |                    0 |\n",
      "| N/A   36C    P0              31W /  72W |  19895MiB / 23034MiB |      0%      Default |\n",
      "|                                         |                      |                  N/A |\n",
      "+-----------------------------------------+----------------------+----------------------+\n",
      "                                                                                         \n",
      "+---------------------------------------------------------------------------------------+\n",
      "| Processes:                                                                            |\n",
      "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
      "|        ID   ID                                                             Usage      |\n",
      "|=======================================================================================|\n",
      "+---------------------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "# check GPU usage\n",
    "\n",
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
